{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "work_dir=\"./datas/userInfo\"\n",
    "splits_dir=f\"{work_dir}/splits\"\n",
    "\n",
    "\n",
    "import os\n",
    "if not os.path.exists(splits_dir):\n",
    "    os.mkdir(splits_dir)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "excel_names = []"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['41e5fa7ac45c42eabe98b5ec3f798a56-20200413162749-collected-info.xlsx',\n",
       " 'feb7aa6c34774c578abebee8f24125ca-20200413162826-collected-info.xlsx',\n",
       " 'c9c65b50c7d84f9b821955c85f7fe700-20200413162807-collected-info.xlsx',\n",
       " 'eeb35ea8214d4c45aafa62edff5a5a1d-20200413162306-collected-info.xlsx',\n",
       " '60c745ba057a41b58cdc2a90c968bd19-20200423160427-collected-info.xlsx',\n",
       " '8301c8776be64713be2f2d04cab24f7a-20200430152740-collected-info.xlsx',\n",
       " '0d4ba07cad5e4358995d587318b77182-20200413162755-collected-info.xlsx',\n",
       " 'f3661d83680b4d82b284d099c230db63-20200413162728-collected-info.xlsx',\n",
       " '1df56ce606d644e1a6a89c92bedb4b19-20200423160414-collected-info.xlsx',\n",
       " 'f50cbba23b8e48afa69be3cf0fdad74c-20200413162734-collected-info.xlsx',\n",
       " '32e2d319eccd4163a7ede315e61459e3-20200413162840-collected-info.xlsx',\n",
       " 'c55e264f592a4045a2fb3431b2e4bd4c-20200413162834-collected-info.xlsx',\n",
       " '3c2e2d25a6e443858a99c92a92ae7137-20200413162804-collected-info.xlsx']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "for excel_name in os.listdir(splits_dir):\n",
    "    excel_names.append(excel_name)\n",
    "excel_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_list = []\n",
    "\n",
    "for excel_name in excel_names:\n",
    "    # 读取每个excel到df\n",
    "    excel_path = f\"{splits_dir}/{excel_name}\"\n",
    "    df_split = pd.read_excel(excel_path)    \n",
    "    df_list.append(df_split)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_merged = pd.concat(df_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户微信名称</th>\n",
       "      <th>购买商品</th>\n",
       "      <th>购买数量</th>\n",
       "      <th>购买时间</th>\n",
       "      <th>城市</th>\n",
       "      <th>姓名</th>\n",
       "      <th>手机号</th>\n",
       "      <th>酒店名称</th>\n",
       "      <th>地区</th>\n",
       "      <th>职位</th>\n",
       "      <th>地区名称</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>潘慧????（心零售）</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-04-04 13:27:02</td>\n",
       "      <td>内蒙古自治区 巴彦淖尔市 乌拉特前旗</td>\n",
       "      <td>潘磊</td>\n",
       "      <td>1.559848e+10</td>\n",
       "      <td>皇嘉盛宴</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>黄勇军</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-31 22:49:35</td>\n",
       "      <td>湖北省 黄石市 黄石港区</td>\n",
       "      <td>黄勇军</td>\n",
       "      <td>1.867140e+10</td>\n",
       "      <td>黄石金花大酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>蒙古丽宫杨晓凤</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-31 11:50:52</td>\n",
       "      <td>内蒙古自治区 锡林郭勒盟 锡林浩特市</td>\n",
       "      <td>杨晓凤</td>\n",
       "      <td>1.564800e+10</td>\n",
       "      <td>蒙古丽宫酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>蓝天白云</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>未购买</td>\n",
       "      <td>2020-03-30 17:01:19</td>\n",
       "      <td>广东省 广州市 增城区</td>\n",
       "      <td>黄淑清</td>\n",
       "      <td>1.343429e+10</td>\n",
       "      <td>广州嘉力达酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>张波</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-29 21:11:18</td>\n",
       "      <td>四川省巴中市通江县</td>\n",
       "      <td>张波</td>\n",
       "      <td>1.318352e+10</td>\n",
       "      <td>沁源春酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>茶者</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 22:32:10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>张建坤</td>\n",
       "      <td>1.535056e+10</td>\n",
       "      <td>唐朝酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 邢台市 宁晋县</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>『Darling 菲』。</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 21:39:58</td>\n",
       "      <td>NaN</td>\n",
       "      <td>任菲</td>\n",
       "      <td>1.303141e+10</td>\n",
       "      <td>振头古镇</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 石家庄市 桥西区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>远疆海张敏</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 18:47:24</td>\n",
       "      <td>NaN</td>\n",
       "      <td>张敏</td>\n",
       "      <td>1.345862e+10</td>\n",
       "      <td>远疆海</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>四川省 成都市 武侯区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>一半灬</td>\n",
       "      <td>知人自知的系统思维</td>\n",
       "      <td>未购买</td>\n",
       "      <td>2020-04-08 14:13:09</td>\n",
       "      <td>NaN</td>\n",
       "      <td>测试</td>\n",
       "      <td>1.234568e+11</td>\n",
       "      <td>测试酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>小洁</td>\n",
       "      <td>知人自知的系统思维</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-04-04 00:05:42</td>\n",
       "      <td>NaN</td>\n",
       "      <td>封洁</td>\n",
       "      <td>1.733187e+10</td>\n",
       "      <td>河北今生缘酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>939 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          用户微信名称             购买商品 购买数量                 购买时间  \\\n",
       "0    潘慧????（心零售）  社区营销－3313计划（一期）    1  2020-04-04 13:27:02   \n",
       "1            黄勇军  社区营销－3313计划（一期）    1  2020-03-31 22:49:35   \n",
       "2        蒙古丽宫杨晓凤  社区营销－3313计划（一期）    1  2020-03-31 11:50:52   \n",
       "3           蓝天白云  社区营销－3313计划（一期）  未购买  2020-03-30 17:01:19   \n",
       "4             张波  社区营销－3313计划（一期）    1  2020-03-29 21:11:18   \n",
       "..           ...              ...  ...                  ...   \n",
       "33            茶者  社区营销－3313计划（二期）    1  2020-03-28 22:32:10   \n",
       "34  『Darling 菲』。  社区营销－3313计划（二期）    1  2020-03-28 21:39:58   \n",
       "35         远疆海张敏  社区营销－3313计划（二期）    1  2020-03-28 18:47:24   \n",
       "0            一半灬        知人自知的系统思维  未购买  2020-04-08 14:13:09   \n",
       "1             小洁        知人自知的系统思维    1  2020-04-04 00:05:42   \n",
       "\n",
       "                    城市   姓名           手机号     酒店名称   地区   职位          地区名称  \n",
       "0   内蒙古自治区 巴彦淖尔市 乌拉特前旗   潘磊  1.559848e+10     皇嘉盛宴  NaN  NaN           NaN  \n",
       "1         湖北省 黄石市 黄石港区  黄勇军  1.867140e+10  黄石金花大酒店  NaN  NaN           NaN  \n",
       "2   内蒙古自治区 锡林郭勒盟 锡林浩特市  杨晓凤  1.564800e+10   蒙古丽宫酒店  NaN  NaN           NaN  \n",
       "3          广东省 广州市 增城区  黄淑清  1.343429e+10  广州嘉力达酒店  NaN  NaN           NaN  \n",
       "4            四川省巴中市通江县   张波  1.318352e+10    沁源春酒店  NaN  NaN           NaN  \n",
       "..                 ...  ...           ...      ...  ...  ...           ...  \n",
       "33                 NaN  张建坤  1.535056e+10     唐朝酒店  NaN  NaN   河北省 邢台市 宁晋县  \n",
       "34                 NaN   任菲  1.303141e+10     振头古镇  NaN  NaN  河北省 石家庄市 桥西区  \n",
       "35                 NaN   张敏  1.345862e+10      远疆海  NaN  NaN   四川省 成都市 武侯区  \n",
       "0                  NaN   测试  1.234568e+11     测试酒店  NaN  NaN           NaN  \n",
       "1                  NaN   封洁  1.733187e+10  河北今生缘酒店  NaN  NaN           NaN  \n",
       "\n",
       "[939 rows x 11 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_merged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "duplicate_row = df_merged.duplicated(subset=['用户微信名称'],keep='first') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     False\n",
       "1     False\n",
       "2     False\n",
       "3     False\n",
       "4     False\n",
       "      ...  \n",
       "33    False\n",
       "34    False\n",
       "35     True\n",
       "0      True\n",
       "1      True\n",
       "Length: 939, dtype: bool"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "duplicate_row"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_bool(x):\n",
    "    if x==True:\n",
    "        return False\n",
    "    return True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "juge = duplicate_row.apply(get_bool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "data=df_merged.loc[ juge,:].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户微信名称</th>\n",
       "      <th>购买商品</th>\n",
       "      <th>购买数量</th>\n",
       "      <th>购买时间</th>\n",
       "      <th>城市</th>\n",
       "      <th>姓名</th>\n",
       "      <th>手机号</th>\n",
       "      <th>酒店名称</th>\n",
       "      <th>地区</th>\n",
       "      <th>职位</th>\n",
       "      <th>地区名称</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>潘慧????（心零售）</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-04-04 13:27:02</td>\n",
       "      <td>内蒙古自治区 巴彦淖尔市 乌拉特前旗</td>\n",
       "      <td>潘磊</td>\n",
       "      <td>1.559848e+10</td>\n",
       "      <td>皇嘉盛宴</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>黄勇军</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-31 22:49:35</td>\n",
       "      <td>湖北省 黄石市 黄石港区</td>\n",
       "      <td>黄勇军</td>\n",
       "      <td>1.867140e+10</td>\n",
       "      <td>黄石金花大酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>蒙古丽宫杨晓凤</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-31 11:50:52</td>\n",
       "      <td>内蒙古自治区 锡林郭勒盟 锡林浩特市</td>\n",
       "      <td>杨晓凤</td>\n",
       "      <td>1.564800e+10</td>\n",
       "      <td>蒙古丽宫酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>蓝天白云</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>未购买</td>\n",
       "      <td>2020-03-30 17:01:19</td>\n",
       "      <td>广东省 广州市 增城区</td>\n",
       "      <td>黄淑清</td>\n",
       "      <td>1.343429e+10</td>\n",
       "      <td>广州嘉力达酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>张波</td>\n",
       "      <td>社区营销－3313计划（一期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-29 21:11:18</td>\n",
       "      <td>四川省巴中市通江县</td>\n",
       "      <td>张波</td>\n",
       "      <td>1.318352e+10</td>\n",
       "      <td>沁源春酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>孙毅.刀客15508138000</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-30 09:33:07</td>\n",
       "      <td>NaN</td>\n",
       "      <td>孙毅</td>\n",
       "      <td>1.550814e+10</td>\n",
       "      <td>时光美小蛙</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>广东省 广州市 天河区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>A罗曼婚礼 李俊</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-29 13:26:38</td>\n",
       "      <td>NaN</td>\n",
       "      <td>冯晶</td>\n",
       "      <td>1.517950e+10</td>\n",
       "      <td>壹家亲酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>江西省 宜春市 丰城市</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>陇峰酒家-李伦</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 23:30:37</td>\n",
       "      <td>NaN</td>\n",
       "      <td>李伦</td>\n",
       "      <td>1.809329e+10</td>\n",
       "      <td>陇峰酒家</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>甘肃省 定西市 陇西县</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>茶者</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 22:32:10</td>\n",
       "      <td>NaN</td>\n",
       "      <td>张建坤</td>\n",
       "      <td>1.535056e+10</td>\n",
       "      <td>唐朝酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 邢台市 宁晋县</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>『Darling 菲』。</td>\n",
       "      <td>社区营销－3313计划（二期）</td>\n",
       "      <td>1</td>\n",
       "      <td>2020-03-28 21:39:58</td>\n",
       "      <td>NaN</td>\n",
       "      <td>任菲</td>\n",
       "      <td>1.303141e+10</td>\n",
       "      <td>振头古镇</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 石家庄市 桥西区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>741 rows × 11 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              用户微信名称             购买商品 购买数量                 购买时间  \\\n",
       "0        潘慧????（心零售）  社区营销－3313计划（一期）    1  2020-04-04 13:27:02   \n",
       "1                黄勇军  社区营销－3313计划（一期）    1  2020-03-31 22:49:35   \n",
       "2            蒙古丽宫杨晓凤  社区营销－3313计划（一期）    1  2020-03-31 11:50:52   \n",
       "3               蓝天白云  社区营销－3313计划（一期）  未购买  2020-03-30 17:01:19   \n",
       "4                 张波  社区营销－3313计划（一期）    1  2020-03-29 21:11:18   \n",
       "..               ...              ...  ...                  ...   \n",
       "29  孙毅.刀客15508138000  社区营销－3313计划（二期）    1  2020-03-30 09:33:07   \n",
       "30          A罗曼婚礼 李俊  社区营销－3313计划（二期）    1  2020-03-29 13:26:38   \n",
       "31           陇峰酒家-李伦  社区营销－3313计划（二期）    1  2020-03-28 23:30:37   \n",
       "33                茶者  社区营销－3313计划（二期）    1  2020-03-28 22:32:10   \n",
       "34      『Darling 菲』。  社区营销－3313计划（二期）    1  2020-03-28 21:39:58   \n",
       "\n",
       "                    城市   姓名           手机号     酒店名称   地区   职位          地区名称  \n",
       "0   内蒙古自治区 巴彦淖尔市 乌拉特前旗   潘磊  1.559848e+10     皇嘉盛宴  NaN  NaN           NaN  \n",
       "1         湖北省 黄石市 黄石港区  黄勇军  1.867140e+10  黄石金花大酒店  NaN  NaN           NaN  \n",
       "2   内蒙古自治区 锡林郭勒盟 锡林浩特市  杨晓凤  1.564800e+10   蒙古丽宫酒店  NaN  NaN           NaN  \n",
       "3          广东省 广州市 增城区  黄淑清  1.343429e+10  广州嘉力达酒店  NaN  NaN           NaN  \n",
       "4            四川省巴中市通江县   张波  1.318352e+10    沁源春酒店  NaN  NaN           NaN  \n",
       "..                 ...  ...           ...      ...  ...  ...           ...  \n",
       "29                 NaN   孙毅  1.550814e+10    时光美小蛙  NaN  NaN   广东省 广州市 天河区  \n",
       "30                 NaN   冯晶  1.517950e+10    壹家亲酒店  NaN  NaN   江西省 宜春市 丰城市  \n",
       "31                 NaN   李伦  1.809329e+10     陇峰酒家  NaN  NaN   甘肃省 定西市 陇西县  \n",
       "33                 NaN  张建坤  1.535056e+10     唐朝酒店  NaN  NaN   河北省 邢台市 宁晋县  \n",
       "34                 NaN   任菲  1.303141e+10     振头古镇  NaN  NaN  河北省 石家庄市 桥西区  \n",
       "\n",
       "[741 rows x 11 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.drop(['购买数量','购买商品','购买时间'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>用户微信名称</th>\n",
       "      <th>城市</th>\n",
       "      <th>姓名</th>\n",
       "      <th>手机号</th>\n",
       "      <th>酒店名称</th>\n",
       "      <th>地区</th>\n",
       "      <th>职位</th>\n",
       "      <th>地区名称</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>潘慧????（心零售）</td>\n",
       "      <td>内蒙古自治区 巴彦淖尔市 乌拉特前旗</td>\n",
       "      <td>潘磊</td>\n",
       "      <td>1.559848e+10</td>\n",
       "      <td>皇嘉盛宴</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>黄勇军</td>\n",
       "      <td>湖北省 黄石市 黄石港区</td>\n",
       "      <td>黄勇军</td>\n",
       "      <td>1.867140e+10</td>\n",
       "      <td>黄石金花大酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>蒙古丽宫杨晓凤</td>\n",
       "      <td>内蒙古自治区 锡林郭勒盟 锡林浩特市</td>\n",
       "      <td>杨晓凤</td>\n",
       "      <td>1.564800e+10</td>\n",
       "      <td>蒙古丽宫酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>蓝天白云</td>\n",
       "      <td>广东省 广州市 增城区</td>\n",
       "      <td>黄淑清</td>\n",
       "      <td>1.343429e+10</td>\n",
       "      <td>广州嘉力达酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>张波</td>\n",
       "      <td>四川省巴中市通江县</td>\n",
       "      <td>张波</td>\n",
       "      <td>1.318352e+10</td>\n",
       "      <td>沁源春酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>孙毅.刀客15508138000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>孙毅</td>\n",
       "      <td>1.550814e+10</td>\n",
       "      <td>时光美小蛙</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>广东省 广州市 天河区</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>A罗曼婚礼 李俊</td>\n",
       "      <td>NaN</td>\n",
       "      <td>冯晶</td>\n",
       "      <td>1.517950e+10</td>\n",
       "      <td>壹家亲酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>江西省 宜春市 丰城市</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>陇峰酒家-李伦</td>\n",
       "      <td>NaN</td>\n",
       "      <td>李伦</td>\n",
       "      <td>1.809329e+10</td>\n",
       "      <td>陇峰酒家</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>甘肃省 定西市 陇西县</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>茶者</td>\n",
       "      <td>NaN</td>\n",
       "      <td>张建坤</td>\n",
       "      <td>1.535056e+10</td>\n",
       "      <td>唐朝酒店</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 邢台市 宁晋县</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>『Darling 菲』。</td>\n",
       "      <td>NaN</td>\n",
       "      <td>任菲</td>\n",
       "      <td>1.303141e+10</td>\n",
       "      <td>振头古镇</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>河北省 石家庄市 桥西区</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>741 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              用户微信名称                  城市   姓名           手机号     酒店名称   地区  \\\n",
       "0        潘慧????（心零售）  内蒙古自治区 巴彦淖尔市 乌拉特前旗   潘磊  1.559848e+10     皇嘉盛宴  NaN   \n",
       "1                黄勇军        湖北省 黄石市 黄石港区  黄勇军  1.867140e+10  黄石金花大酒店  NaN   \n",
       "2            蒙古丽宫杨晓凤  内蒙古自治区 锡林郭勒盟 锡林浩特市  杨晓凤  1.564800e+10   蒙古丽宫酒店  NaN   \n",
       "3               蓝天白云         广东省 广州市 增城区  黄淑清  1.343429e+10  广州嘉力达酒店  NaN   \n",
       "4                 张波           四川省巴中市通江县   张波  1.318352e+10    沁源春酒店  NaN   \n",
       "..               ...                 ...  ...           ...      ...  ...   \n",
       "29  孙毅.刀客15508138000                 NaN   孙毅  1.550814e+10    时光美小蛙  NaN   \n",
       "30          A罗曼婚礼 李俊                 NaN   冯晶  1.517950e+10    壹家亲酒店  NaN   \n",
       "31           陇峰酒家-李伦                 NaN   李伦  1.809329e+10     陇峰酒家  NaN   \n",
       "33                茶者                 NaN  张建坤  1.535056e+10     唐朝酒店  NaN   \n",
       "34      『Darling 菲』。                 NaN   任菲  1.303141e+10     振头古镇  NaN   \n",
       "\n",
       "     职位          地区名称  \n",
       "0   NaN           NaN  \n",
       "1   NaN           NaN  \n",
       "2   NaN           NaN  \n",
       "3   NaN           NaN  \n",
       "4   NaN           NaN  \n",
       "..  ...           ...  \n",
       "29  NaN   广东省 广州市 天河区  \n",
       "30  NaN   江西省 宜春市 丰城市  \n",
       "31  NaN   甘肃省 定西市 陇西县  \n",
       "33  NaN   河北省 邢台市 宁晋县  \n",
       "34  NaN  河北省 石家庄市 桥西区  \n",
       "\n",
       "[741 rows x 8 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_excel(f\"{work_dir}/user.xlsx\",index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
